





<!DOCTYPE html>
<html class="writer-html5" lang="zh-CN" >
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>在Cuda上部署一个量化模型 &mdash; tvm 0.8.dev1982 文档</title>
  

  
  <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
  <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
  <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="../../_static/gallery.css" type="text/css" />
  <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
  <link rel="stylesheet" href="../../_static/css/tlcpack_theme.css" type="text/css" />

  
  
    <link rel="shortcut icon" href="../../_static/tvm-logo-square.png"/>
  

  
  
  
  
    
      <script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
        <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
        <script src="../../_static/jquery.js"></script>
        <script src="../../_static/underscore.js"></script>
        <script src="../../_static/doctools.js"></script>
        <script src="../../_static/translations.js"></script>
    
    <script type="text/javascript" src="../../_static/js/theme.js"></script>

    
    <script type="text/javascript" src="../../_static/js/tlcpack_theme.js"></script>
    <link rel="index" title="索引" href="../../genindex.html" />
    <link rel="search" title="搜索" href="../../search.html" />
    <link rel="next" title="在CPU上部署Hugging Face Pruned模型" href="deploy_sparse.html" />
    <link rel="prev" title="使用TVM部署预先量化的框架模型-第3部分（TFLite）" href="deploy_prequantized_tflite.html" /> 
</head>

<body class="wy-body-for-nav">

   
  <div class="wy-grid-for-nav">
    
    
<header class="header">
    <div class="innercontainer">
      <div class="headerInner d-flex justify-content-between align-items-center">
          <div class="headerLogo">
               <a href="https://tvm.apache.org/"><img src=https://tvm.apache.org/assets/images/logo.svg alt="logo"></a>
          </div>

          <div id="headMenu" class="headerNav">
            <button type="button" id="closeHeadMenu" class="navCloseBtn"><img src="../../_static/img/close-icon.svg" alt="Close"></button>
             <ul class="nav">
                <li class="nav-item">
                   <a class="nav-link" href=https://tvm.apache.org/community>Community</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://tvm.apache.org/download>Download</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://tvm.apache.org/vta>VTA</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://tvm.apache.org/blog>Blog</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://tvm.apache.org/docs>Docs</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://tvmconf.org>Conference</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://github.com/apache/tvm/>Github</a>
                </li>
                <li class="nav-item">
                   <a class="nav-link" href=https://tvmchinese.github.io/declaration_zh_CN.html>About-Translators</a>
                </li>
             </ul>
               <div class="responsivetlcdropdown">
                 <button type="button" class="btn-link">
                   ASF
                 </button>
                 <ul>
                     <li>
                       <a href=https://apache.org/>Apache Homepage</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/licenses/>License</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/foundation/sponsorship.html>Sponsorship</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/security/>Security</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/foundation/thanks.html>Thanks</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/events/current-event>Events</a>
                     </li>
                     <li>
                       <a href=https://www.zhihu.com/column/c_1429578595417563136>Zhihu</a>
                     </li>
                 </ul>
               </div>
          </div>
            <div class="responsiveMenuIcon">
              <button type="button" id="menuBtn" class="btn-menu"><img src="../../_static/img/menu-icon.svg" alt="Menu Icon"></button>
            </div>

            <div class="tlcDropdown">
              <div class="dropdown">
                <button type="button" class="btn-link dropdown-toggle" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
                  ASF
                </button>
                <div class="dropdown-menu dropdown-menu-right">
                  <ul>
                     <li>
                       <a href=https://apache.org/>Apache Homepage</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/licenses/>License</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/foundation/sponsorship.html>Sponsorship</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/security/>Security</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/foundation/thanks.html>Thanks</a>
                     </li>
                     <li>
                       <a href=https://www.apache.org/events/current-event>Events</a>
                     </li>
                     <li>
                       <a href=https://www.zhihu.com/column/c_1429578595417563136>Zhihu</a>
                     </li>
                  </ul>
                </div>
              </div>
          </div>
       </div>
    </div>
 </header>
 
    <nav data-toggle="wy-nav-shift" class="wy-nav-side fixed">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >
          

          
            <a href="../../index.html">
          

          
            
            <img src="../../_static/tvm-logo-small.png" class="logo" alt="Logo"/>
          
          </a>

          
            
            
                <div class="version">
                  0.8.dev1982
                </div>
            
          

          
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>

          
        </div>

        
        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          
            
            
              
            
            
              <p class="caption" role="heading"><span class="caption-text">如何开始</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../install/index.html">安装 TVM</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../contribute/index.html">贡献者指南</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">用户引导</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../../tutorial/index.html">User Tutorial</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="../index.html">How To Guides</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../compile_models/index.html">编译深度学习模型</a></li>
<li class="toctree-l2 current"><a class="reference internal" href="../deploy/index.html">TVM 部署模型和集成</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="../deploy/index.html#build-the-tvm-runtime-library">构建 TVM 运行 runtime 库</a></li>
<li class="toctree-l3"><a class="reference internal" href="../deploy/index.html#cross-compile-the-tvm-runtime-for-other-architectures">为其它架构交叉编译TVM runtime</a></li>
<li class="toctree-l3"><a class="reference internal" href="../deploy/index.html#optimize-and-tune-models-for-target-devices">针对目标设备优化和调整模型</a></li>
<li class="toctree-l3"><a class="reference internal" href="../deploy/index.html#deploy-optimized-model-on-target-devices">在目标设备上部署优化的模型</a></li>
<li class="toctree-l3 current"><a class="reference internal" href="../deploy/index.html#additional-deployment-how-tos">其他部署方式</a><ul class="current">
<li class="toctree-l4 current"><a class="reference internal" href="index.html">Deploy Deep Learning Models</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../work_with_relay/index.html">Work With Relay</a></li>
<li class="toctree-l2"><a class="reference internal" href="../work_with_schedules/index.html">Work With Tensor Expression and Schedules</a></li>
<li class="toctree-l2"><a class="reference internal" href="../optimize_operators/index.html">优化张量算子</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tune_with_autotvm/index.html">Auto-Tune with Templates and AutoTVM</a></li>
<li class="toctree-l2"><a class="reference internal" href="../tune_with_autoscheduler/index.html">Use AutoScheduler for Template-Free Scheduling</a></li>
<li class="toctree-l2"><a class="reference internal" href="../work_with_microtvm/index.html">Work With microTVM</a></li>
<li class="toctree-l2"><a class="reference internal" href="../extend_tvm/index.html">Extend TVM</a></li>
<li class="toctree-l2"><a class="reference internal" href="../profile/index.html">Profile Models</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../errors.html">Handle TVM Errors</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../faq.html">常见提问</a></li>
</ul>
</li>
</ul>
<p class="caption" role="heading"><span class="caption-text">开发者引导</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../dev/tutorial/index.html">Developer Tutorial</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../dev/how_to/how_to.html">开发者指南</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">架构指南</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../arch/index.html">Design and Architecture</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">主题引导</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../topic/microtvm/index.html">microTVM：裸机使用TVM</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../topic/vta/index.html">VTA: Versatile Tensor Accelerator</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">参考指南</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../reference/langref/index.html">语言参考</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../reference/api/python/index.html">Python API</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../reference/api/links.html">Other APIs</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../reference/publications.html">Publications</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../genindex.html">索引</a></li>
</ul>

            
          
        </div>
        
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
      
      <nav class="wy-nav-top" aria-label="top navigation" data-toggle="wy-nav-top">
        
            <div class="togglemenu">

            </div>
            <div class="nav-content">
              <!-- tvm -->
              Table of content
            </div>
        
      </nav>


      <div class="wy-nav-content">
        
        <div class="rst-content">
        

          




















<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="wy-breadcrumbs">
    
      <li><a href="../../index.html">Docs</a> <span class="br-arrow">></span></li>
        
          <li><a href="../index.html">How To Guides</a> <span class="br-arrow">></span></li>
        
          <li><a href="../deploy/index.html">TVM 部署模型和集成</a> <span class="br-arrow">></span></li>
        
          <li><a href="index.html">Deploy Deep Learning Models</a> <span class="br-arrow">></span></li>
        
      <li>在Cuda上部署一个量化模型</li>
    
    
      <li class="wy-breadcrumbs-aside">
        
            
            <a href="../../_sources/how_to/deploy_models/deploy_quantized.rst.txt" rel="nofollow"> <img src="../../_static//img/source.svg" alt="viewsource"/></a>
          
        
      </li>
    
  </ul>

  
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
  <div class="sphx-glr-download-link-note admonition note">
<p class="admonition-title">注解</p>
<p>Click <a class="reference internal" href="#sphx-glr-download-how-to-deploy-models-deploy-quantized-py"><span class="std std-ref">here</span></a> to download the full example code</p>
</div>
<div class="sphx-glr-example-title section" id="deploy-a-quantized-model-on-cuda">
<span id="sphx-glr-how-to-deploy-models-deploy-quantized-py"></span><h1>在Cuda上部署一个量化模型<a class="headerlink" href="#deploy-a-quantized-model-on-cuda" title="永久链接至标题">¶</a></h1>
<p><strong>作者</strong>: <a class="reference external" href="https://github.com/vinx13">Wuwei Lin</a></p>
<p>本文是使用TVM进行自动量化的介绍性教程。自动量化是TVM中的量化方式之一。关于TVM量化的更多细节可以在`这里 &lt;<a class="reference external" href="https://discuss.tvm.apache.org/t/quantization-story/3920">https://discuss.tvm.apache.org/t/quantization-story/3920</a>&gt;`_. 找到。在本教程中，我们将在ImageNet上导入GluonCV预训练模型到Relay，量化Relay模型，然后执行推理。</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">tvm</span>
<span class="kn">from</span> <span class="nn">tvm</span> <span class="k">import</span> <span class="n">te</span>
<span class="kn">from</span> <span class="nn">tvm</span> <span class="k">import</span> <span class="n">relay</span>
<span class="kn">import</span> <span class="nn">mxnet</span> <span class="k">as</span> <span class="nn">mx</span>
<span class="kn">from</span> <span class="nn">tvm.contrib.download</span> <span class="k">import</span> <span class="n">download_testdata</span>
<span class="kn">from</span> <span class="nn">mxnet</span> <span class="k">import</span> <span class="n">gluon</span>
<span class="kn">import</span> <span class="nn">logging</span>
<span class="kn">import</span> <span class="nn">os</span>

<span class="n">batch_size</span> <span class="o">=</span> <span class="mi">1</span>
<span class="n">model_name</span> <span class="o">=</span> <span class="s2">&quot;resnet18_v1&quot;</span>
<span class="n">target</span> <span class="o">=</span> <span class="s2">&quot;cuda&quot;</span>
<span class="n">dev</span> <span class="o">=</span> <span class="n">tvm</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="n">target</span><span class="p">)</span>
</pre></div>
</div>
<div class="section" id="prepare-the-dataset">
<h2>准备数据集<a class="headerlink" href="#prepare-the-dataset" title="永久链接至标题">¶</a></h2>
<p>我们将演示如何准备量化的校准数据集。我们首先下载ImageNet的验证集并对数据集进行预处理。</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">calibration_rec</span> <span class="o">=</span> <span class="n">download_testdata</span><span class="p">(</span>
    <span class="s2">&quot;http://data.mxnet.io.s3-website-us-west-1.amazonaws.com/data/val_256_q90.rec&quot;</span><span class="p">,</span>
    <span class="s2">&quot;val_256_q90.rec&quot;</span><span class="p">,</span>
<span class="p">)</span>


<span class="k">def</span> <span class="nf">get_val_data</span><span class="p">(</span><span class="n">num_workers</span><span class="o">=</span><span class="mi">4</span><span class="p">):</span>
    <span class="n">mean_rgb</span> <span class="o">=</span> <span class="p">[</span><span class="mf">123.68</span><span class="p">,</span> <span class="mf">116.779</span><span class="p">,</span> <span class="mf">103.939</span><span class="p">]</span>
    <span class="n">std_rgb</span> <span class="o">=</span> <span class="p">[</span><span class="mf">58.393</span><span class="p">,</span> <span class="mf">57.12</span><span class="p">,</span> <span class="mf">57.375</span><span class="p">]</span>

    <span class="k">def</span> <span class="nf">batch_fn</span><span class="p">(</span><span class="n">batch</span><span class="p">):</span>
        <span class="k">return</span> <span class="n">batch</span><span class="o">.</span><span class="n">data</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">asnumpy</span><span class="p">(),</span> <span class="n">batch</span><span class="o">.</span><span class="n">label</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">asnumpy</span><span class="p">()</span>

    <span class="n">img_size</span> <span class="o">=</span> <span class="mi">299</span> <span class="k">if</span> <span class="n">model_name</span> <span class="o">==</span> <span class="s2">&quot;inceptionv3&quot;</span> <span class="k">else</span> <span class="mi">224</span>
    <span class="n">val_data</span> <span class="o">=</span> <span class="n">mx</span><span class="o">.</span><span class="n">io</span><span class="o">.</span><span class="n">ImageRecordIter</span><span class="p">(</span>
        <span class="n">path_imgrec</span><span class="o">=</span><span class="n">calibration_rec</span><span class="p">,</span>
        <span class="n">preprocess_threads</span><span class="o">=</span><span class="n">num_workers</span><span class="p">,</span>
        <span class="n">shuffle</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
        <span class="n">batch_size</span><span class="o">=</span><span class="n">batch_size</span><span class="p">,</span>
        <span class="n">resize</span><span class="o">=</span><span class="mi">256</span><span class="p">,</span>
        <span class="n">data_shape</span><span class="o">=</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="n">img_size</span><span class="p">,</span> <span class="n">img_size</span><span class="p">),</span>
        <span class="n">mean_r</span><span class="o">=</span><span class="n">mean_rgb</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span>
        <span class="n">mean_g</span><span class="o">=</span><span class="n">mean_rgb</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span>
        <span class="n">mean_b</span><span class="o">=</span><span class="n">mean_rgb</span><span class="p">[</span><span class="mi">2</span><span class="p">],</span>
        <span class="n">std_r</span><span class="o">=</span><span class="n">std_rgb</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span>
        <span class="n">std_g</span><span class="o">=</span><span class="n">std_rgb</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span>
        <span class="n">std_b</span><span class="o">=</span><span class="n">std_rgb</span><span class="p">[</span><span class="mi">2</span><span class="p">],</span>
    <span class="p">)</span>
    <span class="k">return</span> <span class="n">val_data</span><span class="p">,</span> <span class="n">batch_fn</span>
</pre></div>
</div>
<p>校准的数据集应该是一个可迭代对象。我们在Python中将校准数据集定义为生成器对象。在本教程中，我们只使用一些样本进行校准。</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">calibration_samples</span> <span class="o">=</span> <span class="mi">10</span>


<span class="k">def</span> <span class="nf">calibrate_dataset</span><span class="p">():</span>
    <span class="n">val_data</span><span class="p">,</span> <span class="n">batch_fn</span> <span class="o">=</span> <span class="n">get_val_data</span><span class="p">()</span>
    <span class="n">val_data</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>
    <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">batch</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">val_data</span><span class="p">):</span>
        <span class="k">if</span> <span class="n">i</span> <span class="o">*</span> <span class="n">batch_size</span> <span class="o">&gt;=</span> <span class="n">calibration_samples</span><span class="p">:</span>
            <span class="k">break</span>
        <span class="n">data</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">batch_fn</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span>
        <span class="k">yield</span> <span class="p">{</span><span class="s2">&quot;data&quot;</span><span class="p">:</span> <span class="n">data</span><span class="p">}</span>
</pre></div>
</div>
</div>
<div class="section" id="import-the-model">
<h2>导入模型<a class="headerlink" href="#import-the-model" title="永久链接至标题">¶</a></h2>
<p>我们使用Relay MxNet前端从Gluon模型库导入一个模型。</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">get_model</span><span class="p">():</span>
    <span class="n">gluon_model</span> <span class="o">=</span> <span class="n">gluon</span><span class="o">.</span><span class="n">model_zoo</span><span class="o">.</span><span class="n">vision</span><span class="o">.</span><span class="n">get_model</span><span class="p">(</span><span class="n">model_name</span><span class="p">,</span> <span class="n">pretrained</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
    <span class="n">img_size</span> <span class="o">=</span> <span class="mi">299</span> <span class="k">if</span> <span class="n">model_name</span> <span class="o">==</span> <span class="s2">&quot;inceptionv3&quot;</span> <span class="k">else</span> <span class="mi">224</span>
    <span class="n">data_shape</span> <span class="o">=</span> <span class="p">(</span><span class="n">batch_size</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">img_size</span><span class="p">,</span> <span class="n">img_size</span><span class="p">)</span>
    <span class="n">mod</span><span class="p">,</span> <span class="n">params</span> <span class="o">=</span> <span class="n">relay</span><span class="o">.</span><span class="n">frontend</span><span class="o">.</span><span class="n">from_mxnet</span><span class="p">(</span><span class="n">gluon_model</span><span class="p">,</span> <span class="p">{</span><span class="s2">&quot;data&quot;</span><span class="p">:</span> <span class="n">data_shape</span><span class="p">})</span>
    <span class="k">return</span> <span class="n">mod</span><span class="p">,</span> <span class="n">params</span>
</pre></div>
</div>
</div>
<div class="section" id="quantize-the-model">
<h2>量化模型<a class="headerlink" href="#quantize-the-model" title="永久链接至标题">¶</a></h2>
<p>In quantization, we need to find the scale for each weight and intermediate
feature map tensor of each layer.</p>
<p>For weights, the scales are directly calculated based on the value of the
weights. Two modes are supported: <cite>power2</cite> and <cite>max</cite>. Both modes find the
maximum value within the weight tensor first. In <cite>power2</cite> mode, the maximum
is rounded down to power of two. If the scales of both weights and
intermediate feature maps are power of two, we can leverage bit shifting for
multiplications. This make it computationally more efficient. In <cite>max</cite> mode,
the maximum is used as the scale. Without rounding, <cite>max</cite> mode might have
better accuracy in some cases. When the scales are not powers of two, fixed
point multiplications will be used.</p>
<p>For intermediate feature maps, we can find the scales with data-aware
quantization. Data-aware quantization takes a calibration dataset as the
input argument. Scales are calculated by minimizing the KL divergence between
distribution of activation before and after quantization.
Alternatively, we can also use pre-defined global scales. This saves the time
for calibration. But the accuracy might be impacted.</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">quantize</span><span class="p">(</span><span class="n">mod</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">data_aware</span><span class="p">):</span>
    <span class="k">if</span> <span class="n">data_aware</span><span class="p">:</span>
        <span class="k">with</span> <span class="n">relay</span><span class="o">.</span><span class="n">quantize</span><span class="o">.</span><span class="n">qconfig</span><span class="p">(</span><span class="n">calibrate_mode</span><span class="o">=</span><span class="s2">&quot;kl_divergence&quot;</span><span class="p">,</span> <span class="n">weight_scale</span><span class="o">=</span><span class="s2">&quot;max&quot;</span><span class="p">):</span>
            <span class="n">mod</span> <span class="o">=</span> <span class="n">relay</span><span class="o">.</span><span class="n">quantize</span><span class="o">.</span><span class="n">quantize</span><span class="p">(</span><span class="n">mod</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">dataset</span><span class="o">=</span><span class="n">calibrate_dataset</span><span class="p">())</span>
    <span class="k">else</span><span class="p">:</span>
        <span class="k">with</span> <span class="n">relay</span><span class="o">.</span><span class="n">quantize</span><span class="o">.</span><span class="n">qconfig</span><span class="p">(</span><span class="n">calibrate_mode</span><span class="o">=</span><span class="s2">&quot;global_scale&quot;</span><span class="p">,</span> <span class="n">global_scale</span><span class="o">=</span><span class="mf">8.0</span><span class="p">):</span>
            <span class="n">mod</span> <span class="o">=</span> <span class="n">relay</span><span class="o">.</span><span class="n">quantize</span><span class="o">.</span><span class="n">quantize</span><span class="p">(</span><span class="n">mod</span><span class="p">,</span> <span class="n">params</span><span class="p">)</span>
    <span class="k">return</span> <span class="n">mod</span>
</pre></div>
</div>
</div>
<div class="section" id="run-inference">
<h2>运行推理<a class="headerlink" href="#run-inference" title="永久链接至标题">¶</a></h2>
<p>我们创建一个Relay VM来构建和执行模型。</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">run_inference</span><span class="p">(</span><span class="n">mod</span><span class="p">):</span>
    <span class="n">model</span> <span class="o">=</span> <span class="n">relay</span><span class="o">.</span><span class="n">create_executor</span><span class="p">(</span><span class="s2">&quot;vm&quot;</span><span class="p">,</span> <span class="n">mod</span><span class="p">,</span> <span class="n">dev</span><span class="p">,</span> <span class="n">target</span><span class="p">)</span><span class="o">.</span><span class="n">evaluate</span><span class="p">()</span>
    <span class="n">val_data</span><span class="p">,</span> <span class="n">batch_fn</span> <span class="o">=</span> <span class="n">get_val_data</span><span class="p">()</span>
    <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">batch</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">val_data</span><span class="p">):</span>
        <span class="n">data</span><span class="p">,</span> <span class="n">label</span> <span class="o">=</span> <span class="n">batch_fn</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span>
        <span class="n">prediction</span> <span class="o">=</span> <span class="n">model</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
        <span class="k">if</span> <span class="n">i</span> <span class="o">&gt;</span> <span class="mi">10</span><span class="p">:</span>  <span class="c1"># only run inference on a few samples in this tutorial</span>
            <span class="k">break</span>


<span class="k">def</span> <span class="nf">main</span><span class="p">():</span>
    <span class="n">mod</span><span class="p">,</span> <span class="n">params</span> <span class="o">=</span> <span class="n">get_model</span><span class="p">()</span>
    <span class="n">mod</span> <span class="o">=</span> <span class="n">quantize</span><span class="p">(</span><span class="n">mod</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">data_aware</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
    <span class="n">run_inference</span><span class="p">(</span><span class="n">mod</span><span class="p">)</span>


<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
    <span class="n">main</span><span class="p">()</span>
</pre></div>
</div>
<p class="sphx-glr-script-out">输出:</p>
<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/tvm_chinese/tvm/python/tvm/relay/build_module.py:333: DeprecationWarning: Please use input parameter mod (tvm.IRModule) instead of deprecated parameter mod (tvm.relay.function.Function)
  DeprecationWarning,
</pre></div>
</div>
<div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-quantized-py">
<div class="sphx-glr-download docutils container">
<p><a class="reference download internal" download="" href="../../_downloads/7810ecf51bfc05f7d5e8a400ac3e815d/deploy_quantized.py"><code class="xref download docutils literal notranslate"><span class="pre">下载</span> <span class="pre">Python</span> <span class="pre">源代码:</span> <span class="pre">deploy_quantized.py</span></code></a></p>
</div>
<div class="sphx-glr-download docutils container">
<p><a class="reference download internal" download="" href="../../_downloads/a269cb38341b190be980a0bd3ea8a625/deploy_quantized.ipynb"><code class="xref download docutils literal notranslate"><span class="pre">下载Jupyter</span> <span class="pre">notebook:</span> <span class="pre">deploy_quantized.ipynb</span></code></a></p>
</div>
</div>
<p class="sphx-glr-signature"><a class="reference external" href="https://sphinx-gallery.github.io">Gallery generated by Sphinx-Gallery</a></p>
</div>
</div>


           </div>
           
          </div>
          

<footer>

    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="deploy_sparse.html" class="btn btn-neutral float-right" title="在CPU上部署Hugging Face Pruned模型" accesskey="n" rel="next">下一个 <span class="fa fa-arrow-circle-right"></span></a>
      
      
        <a href="deploy_prequantized_tflite.html" class="btn btn-neutral float-left" title="使用TVM部署预先量化的框架模型-第3部分（TFLite）" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> 上一个</a>
      
    </div>

<div id="button" class="backtop"><img src="../../_static//img/right.svg" alt="backtop"/> </div>
<section class="footerSec">
    <div class="footerHeader">
      <ul class="d-flex align-md-items-center justify-content-between flex-column flex-md-row">
        <li class="copywrite d-flex align-items-center">
          <h5 id="copy-right-info">© 2020 Apache Software Foundation | All right reserved</h5>
        </li>
      </ul>

    </div>

    <ul>
      <li class="footernote">Copyright © 2020 The Apache Software Foundation. Apache TVM, Apache, the Apache feather, and the Apache TVM project logo are either trademarks or registered trademarks of the Apache Software Foundation.</li>
    </ul>

</section>
</footer>
        </div>
      </div>

    </section>

  </div>
  

    <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.12.9/umd/popper.min.js" integrity="sha384-ApNbgh9B+Y1QKtv3Rn7W3mgPxhU9K/ScQsAP7hUibX39j7fakFPskvXusvfa0b4Q" crossorigin="anonymous"></script>
    <script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/js/bootstrap.min.js" integrity="sha384-JZR6Spejh4U02d8jOt6vLEHfe/JQGiRRSQQxSfFWpi1MquVdAyjUar5+76PVCmYl" crossorigin="anonymous"></script>

  </body>
  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>

  
  
    
    <!-- Theme Analytics -->
    <script>
    (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
      m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
    })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

    ga('create', 'UA-75982049-2', 'auto');
    ga('send', 'pageview');
    </script>

    
   

</body>
</html>